{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 一、 最值归一化 normalization\n",
    "* 把所有数据映射到0~1之间\n",
    "* 适用于分布有明显边界的情况, 受outlier影响较大\n",
    "### $x_{scale} = \\frac{x - x_{min}}{x_{max} - x_{min}}$\n",
    "\n",
    "$\\frac{x-1}{y}$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 对于一维向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([86, 76, 61, 80, 32, 30,  7, 47, 24, 59, 24, 89, 70, 84,  1, 20, 75,\n",
       "       42, 61, 30, 68, 54, 60, 99, 33,  7, 54, 92, 32, 21, 44, 66, 49, 51,\n",
       "       44, 89, 98, 43, 28, 18, 77, 37, 25, 72, 67,  7, 69, 88, 59,  5, 92,\n",
       "        1, 14, 57, 36, 87, 92,  3, 81, 53, 11, 38,  8,  2, 83,  6,  7, 28,\n",
       "       79,  9, 51, 64, 23, 16, 51, 90, 81, 18, 41, 39, 87, 26, 88,  1,  0,\n",
       "       83, 64, 69, 22, 20, 75, 52, 31, 70, 84, 59, 45, 49, 57, 58])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = np.random.randint(0,100,100)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "slideshow": {
     "slide_type": "-"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.86868687, 0.76767677, 0.61616162, 0.80808081, 0.32323232,\n",
       "       0.3030303 , 0.07070707, 0.47474747, 0.24242424, 0.5959596 ,\n",
       "       0.24242424, 0.8989899 , 0.70707071, 0.84848485, 0.01010101,\n",
       "       0.2020202 , 0.75757576, 0.42424242, 0.61616162, 0.3030303 ,\n",
       "       0.68686869, 0.54545455, 0.60606061, 1.        , 0.33333333,\n",
       "       0.07070707, 0.54545455, 0.92929293, 0.32323232, 0.21212121,\n",
       "       0.44444444, 0.66666667, 0.49494949, 0.51515152, 0.44444444,\n",
       "       0.8989899 , 0.98989899, 0.43434343, 0.28282828, 0.18181818,\n",
       "       0.77777778, 0.37373737, 0.25252525, 0.72727273, 0.67676768,\n",
       "       0.07070707, 0.6969697 , 0.88888889, 0.5959596 , 0.05050505,\n",
       "       0.92929293, 0.01010101, 0.14141414, 0.57575758, 0.36363636,\n",
       "       0.87878788, 0.92929293, 0.03030303, 0.81818182, 0.53535354,\n",
       "       0.11111111, 0.38383838, 0.08080808, 0.02020202, 0.83838384,\n",
       "       0.06060606, 0.07070707, 0.28282828, 0.7979798 , 0.09090909,\n",
       "       0.51515152, 0.64646465, 0.23232323, 0.16161616, 0.51515152,\n",
       "       0.90909091, 0.81818182, 0.18181818, 0.41414141, 0.39393939,\n",
       "       0.87878788, 0.26262626, 0.88888889, 0.01010101, 0.        ,\n",
       "       0.83838384, 0.64646465, 0.6969697 , 0.22222222, 0.2020202 ,\n",
       "       0.75757576, 0.52525253, 0.31313131, 0.70707071, 0.84848485,\n",
       "       0.5959596 , 0.45454545, 0.49494949, 0.57575758, 0.58585859])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(x - np.min(x))/(np.max(x)-np.min(x))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 对于二维矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[68, 85],\n",
       "       [54, 44],\n",
       "       [36, 14],\n",
       "       [38, 68],\n",
       "       [51, 63],\n",
       "       [50,  9],\n",
       "       [34, 77],\n",
       "       [37, 85],\n",
       "       [96, 81],\n",
       "       [93, 64]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = np.random.randint(0,100, (50,2))\n",
    "x[:10, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[68., 85.],\n",
       "       [54., 44.],\n",
       "       [36., 14.],\n",
       "       [38., 68.],\n",
       "       [51., 63.],\n",
       "       [50.,  9.],\n",
       "       [34., 77.],\n",
       "       [37., 85.],\n",
       "       [96., 81.],\n",
       "       [93., 64.]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 上面randint生成的是int类型, 而我们转换为0-1之间后是float类型\n",
    "# 数据需要转换\n",
    "x = np.array(x, dtype=float)\n",
    "x[:10, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.68041237, 0.86458333],\n",
       "       [0.53608247, 0.4375    ],\n",
       "       [0.35051546, 0.125     ],\n",
       "       [0.37113402, 0.6875    ],\n",
       "       [0.50515464, 0.63541667],\n",
       "       [0.49484536, 0.07291667],\n",
       "       [0.32989691, 0.78125   ],\n",
       "       [0.36082474, 0.86458333],\n",
       "       [0.96907216, 0.82291667],\n",
       "       [0.93814433, 0.64583333]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 将第0列数据进行归一化\n",
    "x[:,0] = (x[:,0] - np.min(x[:,0]))/(np.max(x[:,0] - np.min(x[:,0])))\n",
    "x[:,1] = (x[:,1] - np.min(x[:,1]))/(np.max(x[:,1] - np.min(x[:,1])))\n",
    "x[:10, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAFihJREFUeJzt3X+MXOdZ6PHvUyehy6V0ETYSWXtrA06EFSO5WpIgS1BouXGDFFtRAQdFUBTVAhT4A4jkqlVBoVcxrbjoopt7wbq3KgWR9IeQWZEgI3CqoggHb2RIGldGJi3NbioSoM4/dckPHv6YcTKezHrP7Jwz58d8P1LUnZm3u++ZnX38nOc873siM5Ekdctb6p6AJKl8BndJ6iCDuyR1kMFdkjrI4C5JHWRwl6QOMrhLUgcZ3CWpgwzuktRB19T1g7du3Zo7d+6s68dLUis9+eST/5qZ2zYaV1tw37lzJysrK3X9eElqpYj45yLjLMtIUgcZ3CWpgwzuktRBBndJ6iCDuyR1kMFdkjrI4C5JHbRhcI+IT0TECxHxxXVej4j4vYi4EBFPRcQ7y5+mJGkcRRYxfRL438Cn1nn9vcDu/n+3AP+3/7+tdOLsGh8/eZ7nL17i+vk57rvtRg7tW6h7WpI0lg0z98z8AvDvVxlyEPhU9pwG5iPiu8ua4DSdOLvGB//0adYuXiKBtYuX+OCfPs2Js2t1T02SxlJGzX0BeG7g8Wr/udb5+MnzXHrltSueu/TKa3z85PmaZiRJm1NGcI8Rz+XIgRFHImIlIlZefPHFEn50uZ6/eGms5yWpqcoI7qvAjoHH24HnRw3MzOOZuZSZS9u2bbip2dRdPz831vOS1FRlBPdl4Gf7XTO3Ai9l5tdK+L5Td99tNzJ37ZYrnpu7dgv33XZjTTOSpM3ZsFsmIh4C3gVsjYhV4DeAawEy8/eBR4HbgQvAN4Cfr2qyVbvcFWO3jKS2i8yR5fHKLS0tpfu5S9J4IuLJzFzaaFxtN+uQ2sB1D2org7u0jsvrHi63x15e9wAY4NV47i0jrcN1D2ozg7u0Dtc9qM0M7tI6XPegNjO4S+tw3YPazAuq0jpc96A2M7hLV3Fo34LBXK1kWUaSOsjgLkkdZFmmRK5mlNQUBveSuJpRUpNYlimJqxklNYnBvSSuZpTUJJZlSnL9/BxrIwK5qxlVNq/tqAgz95K4mlHTcPnaztrFSyRvXNs5cXat7qmpYczcS+JqRk3D1a7t+FlrprrOtAzuJXI1o6rmtZ12qbOLzuDeAl2osXbhGJpgM9d2fO/rU+eZljX3hutCjbULx9AU417b8b2vV51nWgb3hrvav/wnzq6x/9gpdh19hP3HTjX2D9Y1AOU5tG+BB+7cy8L8HAEszM/xwJ17180Cfe/rVec9ASzLNNx6/8JfzsDasCLWOnG5xrm243tfr/tuu/GKv1OYXhedmfuQpmXD6/0LvyWiNRmZdzSqj+99vcY90yqTwX1AE+uT69VYX8scOb6JGZlrAOrje1+/Q/sWePzoj/HlYz/B40d/bGpn1gb3AU2sT673L/9CizKyOrOXWed7P7usuQ9oan1yvRprXbW8zXANQH1872eTwX1Am/aHcUWsVL8mryEwuA+o88r2ZpiRSfVp+j0crLkPsD4pqagmXqMbZOY+xGxYUhFNvUZ3WaHMPSIORMT5iLgQEUdHvL4YEY9FxNmIeCoibi9/qpLUHE1fQ7BhcI+ILcCDwHuBPcBdEbFnaNiHgc9k5j7gMPB/yp6oJDVJ09cQFCnL3AxcyMxnASLiYeAgcG5gTALf3v/67cDzZU5Skpqm6R1rRYL7AvDcwONV4JahMb8J/GVE/DLw34D3jPpGEXEEOAKwuLg47lwlqVGafI2uSM09Rjw3vPb9LuCTmbkduB34o4h40/fOzOOZuZSZS9u2bRt/tmqkpu3HI6lY5r4K7Bh4vJ03l13uAQ4AZObfRsRbga3AC2VMUs3V9F5faVYVydzPALsjYldEXEfvguny0JivAu8GiIjvB94KvFjmRNVMTe/1lWbVhsE9M18F7gVOAl+i1xXzTETcHxF39If9GvCBiPgH4CHg/ZnrbFuoTml6r680qwotYsrMR4FHh577yMDX54D95U5NbdCm/XikWeL2A5pI03t9pVnl9gOaSNN7faVZZXDXxJrc6yvNKssyktRBBndJ6iDLMppZTb6LjtqtCZ8tg7tmkitrVZWmfLYsy2gmubJWVWnKZ8vgrpnkylpVpSmfLYO7ZlLT76Kj9mrKZ8vgrpnkylpVpSmfLS+oqrUm6UhwZa2q0pTPVtS1eePS0lKurKzU8rPVfsMdCdDLjh64c68BWp0WEU9m5tJG48zcNXVl9ABfrSPB4C4Z3DVlZfUAN6UjQWoqg/sMmSRjLmvFXVkZt/vIS1dnt8yMuJwxr128RPJGxlzkZtaT/H+HlZVxN6UjQWoqg/uMmGTVXJkr7srqAT60b4EH7tzLwvwcASzMz3kxVRpgWWZGTJIxl1nfvu+2G0d2uWwm43YfeWl9Zu4zYpKMucwVd2bc0nSYuc+ISTLmMrNtMOOWpsHgPiMmWTXXlBV30rQ0YT/2SblCVZIGNH31sytUpU368ImneeiJ53gtky0R3HXLDj56aG/d09KUdGX1s8FdGvDhE0/zx6e/+vrj1zJff2yAnw1dWf1st4w04KEnnhvreXVPU/Zjn5TBXRrw2jrXoNZ7Xt3TldXPlmXUSZvtdtgSMTKQb4moYppqoK50hxnc1TmT7Dx51y07rqi5Dz6v2dGFtRiFyjIRcSAizkfEhYg4us6Yn4qIcxHxTET8SbnTlIqbZC+cjx7ay923Lr6eqW+J4O5bF72YqtbZMHOPiC3Ag8CPA6vAmYhYzsxzA2N2Ax8E9mfm1yPiu6qasLSRSbsdPnpor8FcrVckc78ZuJCZz2bmy8DDwMGhMR8AHszMrwNk5gvlTlMqrivdDtIkigT3BWCwD2y1/9ygG4AbIuLxiDgdEQfKmqA0rq50O0iTKHJBdVSbwHA7wTXAbuBdwHbgbyLipsy8eMU3ijgCHAFYXFwce7JSEV3pdpAmUSS4rwKDrQLbgedHjDmdma8AX46I8/SC/ZnBQZl5HDgOvb1lNjtpaSNd6HaQJlGkLHMG2B0RuyLiOuAwsDw05gTwowARsZVemebZMicqSSpuw+Cema8C9wIngS8Bn8nMZyLi/oi4oz/sJPBvEXEOeAy4LzP/rapJS5Kuzi1/JbVCF/ZYL4Nb/krqjElWHc8qg3tFzDKk8nRlj/VpMrhXwCxDKldX9lifJrf8rcAke5tIejNXHY/P4F4BswypXK46Hp/BvQJmGVK5Du1b4IE797IwP0cAC/NzjblhdVNZc6/AfbfdOPLu6WYZ0ua56ng8BvcKuLeJpLoZ3CtiliGpTtbcJamDDO6S1EEGd0nqIIO7JHWQF1RVmcH9dd4+dy0RcPEbr9g9JE2BwV2VGN5f5+KlV15/zb12pOpZllElRu2vM8i9dqRqGdxViSL76LjXjlQdyzKqxPXzc6xtELzda+fNvA+AymLmrkqM2sVvkHvtvNnl6xRrFy+RvHFt4sTZtbqnphYyuKsSw7v4zc9dy3d867Xu6HcV3gdAZbIso8q4v854vA+AymTmLjWE9wFQmQzuUkN4tyGVybKMtAlVdLV4HwCVyeAujWl49W2ZK269TqGyWJaRxmRXi9rA4C6Nya4WtYHBXRqTXS1qA4O7NCa7WtQGXlCVxmRXi9qgUHCPiAPA/wK2AP8vM4+tM+59wGeBH8zMldJmKTWMXS1qug3LMhGxBXgQeC+wB7grIvaMGPc24FeAJ8qepCRpPEVq7jcDFzLz2cx8GXgYODhi3G8BHwO+WeL8JEmbUKQsswA8N/B4FbhlcEBE7AN2ZOafR8Svr/eNIuIIcARgcXFx/NkOcN9ryb8Dra9IcI8Rz+XrL0a8Bfhd4P0bfaPMPA4cB1haWsoNhq+ryhWCUlv4d6CrKVKWWQV2DDzeDjw/8PhtwE3A5yPiK8CtwHJELJU1yWGuEJT8O6jLibNr7D92il1HH2H/sVONvZlKkcz9DLA7InYBa8Bh4Gcuv5iZLwFbLz+OiM8Dv15lt4wrBCX/DurQprOlDTP3zHwVuBc4CXwJ+ExmPhMR90fEHVVPcBRXCEr+HdShTWdLhVaoZuajmXlDZn5vZv6P/nMfyczlEWPfVXWPuysEJf8O6tCms6VWrlB1haDk30Edrp+fY21EIG/i2VJkbrppZSJLS0u5suIiVkntMVxzh97Z0jRv+B4RT2bmhg0rrczcJakObTpbMrhL0hjasq+QW/5KUgcZ3CWpgwzuktRBBndJ6iCDuyR1kMFdkjrI4C5JHWRwl6QOMrhLUgcZ3CWpg9x+oAW8T6akcRncG65Nd36R1BytDe6zks1e7c4vXTxeSeVoZXCfpWy2TXd+kdQcrbyg2qb7GE7K+2RqUifOrrH/2Cl2HX2E/cdOceLsWt1T0hS0MrjPUjbrfTI1ictnuWsXL5G8cZZrgO++Vgb3WcpmD+1b4IE797IwP0cAC/NzU72ll8pTRwY9S2e5ulIra+733XbjyPsYdjWbbcudX7S+uq4TzdJZrq7UyszdbFZtU1cGPUtnubpSKzN3MJtVu9SVQc/aWa7e0MrMXWqbujJoz3JnV2szd6lN6sygPcudTQZ3XWFWVv5O2+X30PdW02Jw1+tmaeVvHcygNU3W3PU6e6Kl7igU3CPiQEScj4gLEXF0xOu/GhHnIuKpiPjriHhH+VNV1eyJlrpjw+AeEVuAB4H3AnuAuyJiz9Cws8BSZv4A8DngY2VPVNWzJ1rqjiKZ+83Ahcx8NjNfBh4GDg4OyMzHMvMb/Yenge3lTlPT4D42UncUuaC6ADw38HgVuOUq4+8B/mKSSakednRIG2tLR1mR4B4jnsuRAyPuBpaAH1nn9SPAEYDFxcWCU9Q02dEhra9NHWVFyjKrwI6Bx9uB54cHRcR7gA8Bd2Tmf4z6Rpl5PDOXMnNp27Ztm5mvJNWmTR1lRYL7GWB3ROyKiOuAw8Dy4ICI2Af8Ab3A/kL505Sk+rWpo2zDskxmvhoR9wIngS3AJzLzmYi4H1jJzGXg48C3AZ+NCICvZuYdFc5bkoDp1sCvn59jbUQgb2JHWaEVqpn5KPDo0HMfGfj6PSXPS5I2NO0aeJt22XSFqqTWmnYNvE27bLq3jKTWqqMG3paOMoO7GqPO/uG29C43SRPeszbVwKfNsowa4XLtdO3iJZI3aqfTuIl0nT+7rZrynrmqen0G9w46cXaN/cdOsevoI+w/dqoVQarO/uE29S43RVPeszbVwKfNskzHtGkF3aA6+4fb1LvcFE16z9pSA582M/eOaUpGBeOdQdS5I6W7YY7P96z5DO4d05SMatyabJ21U+u24/M9az6De8c0JaMa9wyiztqpddvx+Z41X2SO3OCxcktLS7myslLLz+6y4Zo79DKqaf/h7Tr6yMitQwP48rGfmNo8pK6JiCczc2mjcZ24oFpmv20Tencn0ZQ92e0/lurV+uBeZndIWztNhjWhe6BNe3BIXdT6mnuZ3SFN6jRpu0lrsm3s1ZeapPWZe5ndIU3pNOmKzZ5BdOUMSqpT6zP3MrtDmtJpMus8g5Im1/rgXma/rb27zeAZlDS51pdlyuwOaUqnyayro9Om7V1S0jD73NU40+7Vb8raAKmIon3urS/LqHumvfrRGr+6qPVlGXXTNHv1rfGri8zcNfPsklIXGdw18+ySUhdZltHMs0tKXWRwl2jGfjxSmQzumjp7yqXqGdw1Ve4bI02HwV0TGycTv1pPucFdKo/BXRMZNxO3p1yaDlshNZFxV3faUy5Nh8FdExk3E7enXJqOQsE9Ig5ExPmIuBARR0e8/i0R8en+609ExM6yJ6rJVXF3o3Ez8WnvGyPNqg1r7hGxBXgQ+HFgFTgTEcuZeW5g2D3A1zPz+yLiMPDbwE9XMWFtTlVdKpu5V6o95VL1imTuNwMXMvPZzHwZeBg4ODTmIPCH/a8/B7w7IqK8aWpSVe18aCYuNVORbpkF4LmBx6vALeuNycxXI+Il4DuBfx0cFBFHgCMAi4uLm5yyNqPKLhUzcal5imTuozLw4Tt8FBlDZh7PzKXMXNq2bVuR+akkdqlIs6VIcF8Fdgw83g48v96YiLgGeDvw72VMUOWwS0WaLUWC+xlgd0TsiojrgMPA8tCYZeDn+l+/DziVdd2/TyNZG5dmy4Y1934N/V7gJLAF+ERmPhMR9wMrmbkM/H/gjyLiAr2M/XCVk9bmWBuXZkeh7Qcy81Hg0aHnPjLw9TeBnyx3apKkzXKFqiR1kMFdkjrI4C5JHWRwl6QOMrhLUgcZ3CWpgwzuktRBUddC0oh4EfjnEr7VVoY2KOs4j7e7ZulYwePdrHdk5oabc9UW3MsSESuZuVT3PKbF4+2uWTpW8HirZllGkjrI4C5JHdSF4H687glMmcfbXbN0rODxVqr1NXdJ0pt1IXOXJA1pTXCPiAMRcT4iLkTE0RGvf0tEfLr/+hMRsXP6syxHgWP91Yg4FxFPRcRfR8Q76phnWTY63oFx74uIjIhWd1gUOd6I+Kn+7/iZiPiTac+xTAU+z4sR8VhEnO1/pm+vY55liIhPRMQLEfHFdV6PiPi9/nvxVES8s7LJZGbj/6N3k5B/Ar4HuA74B2DP0JhfAn6///Vh4NN1z7vCY/1R4Fv7X/9iW4+16PH2x70N+AJwGliqe94V/353A2eB7+g//q66513x8R4HfrH/9R7gK3XPe4Lj/WHgncAX13n9duAv6N13+lbgiarm0pbM/WbgQmY+m5kvAw8DB4fGHAT+sP/154B3R8SoG3c33YbHmpmPZeY3+g9P07uvbVsV+d0C/BbwMeCb05xcBYoc7weABzPz6wCZ+cKU51imIsebwLf3v347b75Hc2tk5he4+v2jDwKfyp7TwHxEfHcVc2lLcF8Anht4vNp/buSYzHwVeAn4zqnMrlxFjnXQPfQygbba8HgjYh+wIzP/fJoTq0iR3+8NwA0R8XhEnI6IA1ObXfmKHO9vAndHxCq9O7798nSmVotx/743rdBt9hpgVAY+3OZTZEwbFD6OiLgbWAJ+pNIZVeuqxxsRbwF+F3j/tCZUsSK/32volWbeRe+s7G8i4qbMvFjx3KpQ5HjvAj6Zmb8TET9E737MN2Xmf1Y/vambWpxqS+a+CuwYeLydN5+6vT4mIq6hd3p3tdOjpipyrETEe4APAXdk5n9MaW5V2Oh43wbcBHw+Ir5Cr0653OKLqkU/y3+Wma9k5peB8/SCfRsVOd57gM8AZObfAm+ltw9LFxX6+y5DW4L7GWB3ROyKiOvoXTBdHhqzDPxc/+v3AaeyfwWjZTY81n6Z4g/oBfY212Nhg+PNzJcyc2tm7szMnfSuMdyRmSv1THdiRT7LJ+hdNCcittIr0zw71VmWp8jxfhV4N0BEfD+94P7iVGc5PcvAz/a7Zm4FXsrMr1Xyk+q+ujzGVejbgX+kd+X9Q/3n7qf3hw69D8RngQvA3wHfU/ecKzzWvwL+Bfj7/n/Ldc+5yuMdGvt5WtwtU/D3G8D/BM4BTwOH655zxce7B3icXifN3wP/ve45T3CsDwFfA16hl6XfA/wC8AsDv9sH++/F01V+ll2hKkkd1JayjCRpDAZ3Seogg7skdZDBXZI6yOAuSR1kcJekDjK4S1IHGdwlqYP+Cye7VHEVkl26AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x21844443160>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(x[:,0], x[:,1])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4519587628865979"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(x[:,0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.2810889351756117"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.std(x[:,0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 二、 均值方差归一化 standardization\n",
    "* 把所有数据归一到均值为0, 方差为1的分布中\n",
    "* 数据分布没有明显边界, 可能存在极端数据值\n",
    "### $x_{scale} = \\frac{x - x_{mean}}{S}$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "x2 = np.random.randint(0,100,(50,2))\n",
    "x2 = np.array(x2, dtype=float)\n",
    "x2[:,0] = (x2[:,0] - np.mean(x2[:,0]))/np.std(x2[:,0])\n",
    "x2[:,1] = (x2[:,1] - np.mean(x2[:,1]))/np.std(x2[:,1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAF1JJREFUeJzt3X+sX3V9x/HnaxWxMcaKLT964doyu2q1m9WbTmyyoaCFxtBadQP+EDZNwzay/7qUkOhiYlpHsk0Dm3aMiMsCbEQq2roKVsKWiXKxQEFgFoKj9xKpxWKIDVp874/vueN7L9/v937vPeee8znnvB7JTb8/Tr+f9z333Pv+/D6KCMzMrH1+q+oAzMysGk4AZmYt5QRgZtZSTgBmZi3lBGBm1lJOAGZmLeUEYGbWUk4AZmYt5QRgZtZSr6k6gEGWLl0aK1asqDoMM7PaeOCBB34WEcuGOTbpBLBixQrGx8erDsPMrDYk/WTYY90FZGbWUk4AZmYtVUgCkHSTpOckPdLn/fMlvSDpwezr00WUa2Zm81fUGMBXgOuBrw445j8j4sMFlWdmZjkV0gKIiHuB54v4LDMzK0eZYwDnSXpI0rckvaPfQZK2SRqXNH706NESwzMza5eypoH+EHhLRLwoaROwB1jV68CI2A3sBhgbG2vl7cr2HJzguv1PMHn8BMuXLGb7xtVsWTdSdVhA2rFZc/g6K0cpLYCI+EVEvJg93gecImlpGWXXzZ6DE1zztUNMHD9BABPHT3DN1w6x5+BE1aElHZs1h6+z8pSSACSdKUnZ4/VZucfKKLturtv/BCd+/fK01078+mWu2/9ERRG9IuXYrDl8nZWnkC4gSbcA5wNLJR0BPgOcAhARXwI+BvyZpJPACeDS8N3oe5o8fmJOr5cp5disOXydlaeQBBARl83y/vV0ponaLJYvWcxEjwt9+ZLFFUTz6hhSjc3SNdf+/KZfZymNb3glcGK2b1zN4lMWTXtt8SmL2L5xdUURvSLl2CxN8+nPb/J1ltr4RtKbwbXRVE0glRpCt4WMrepaUdXlN9Wg/vx+5zfl34G85nM+FpITQIK2rBtJ9mJfiNimakVTvxhTtaKp8hZa1eU32Xz781P+HcgjtfENdwFZ5aqe9VF1+U3Wr9++zP78PQcn2LDrACt37GXDrgOVTidN4Xx0cwKwylVdK6q6/Caruj8/tT73qs/HTE4AVrmqa0VVl99kW9aNsHPrWkaWLEbAyJLF7Ny6trTundRad1Wfj5k8BmCV275x9bQ+eCi3VlR1+U1XZX9+iq27lMY3nACsclXP+qi6fFs4TV9TkJdSXpA7NjYWviewmc3XzBle0GndVdntstAkPRARY8Mc6xaAmTWWW3eDOQGYWaOl1OeeGs8CMjNrKScAM7OWcgIwM2spJwAzs5byIHDDeZdLy8vXUHM5ATSYd7m0vHwNNZu7gBostX1QrH58DTWbE0CDpbgPitWLr6FmcwJoMO9yaXn5Gmq2QhKApJskPSfpkT7vS9IXJR2W9LCkdxdRrg2W2t7jVj++hpqtqBbAV4CLBrx/MbAq+9oG/GNB5doAqe09bvXja6jZCtsNVNIK4JsR8c4e730ZuCcibsmePwGcHxHPDvpM7wZqZjY3Ke4GOgI80/X8SPbawARgZlaWNq53KCsBqMdrPZsekrbR6SZidHR0IWMyMwPau96hrFlAR4Bzup6fDUz2OjAidkfEWESMLVu2rJTgrP72HJxgw64DrNyxlw27DlR202+rp7audygrAdwJfCKbDfRe4IXZ+v/NhjVVe5s4foLgldqbk4ANq63rHQrpApJ0C3A+sFTSEeAzwCkAEfElYB+wCTgM/BL4kyLKTUkb+w9TMaj25p+Br81htPXewYUkgIi4bJb3A/iLIspKUVv7D1PR1trbMHxtDmf7xtU97x3c9PUOXglcgLb2H6bCq1X787U5nLaud/BuoAVwDbRaba29DcPX5vDaeO9gtwAK4BpotdpaexuGr00bxC2AArgGWr021t6G4WvTBnECKMDUHx7PtLDU+Nq0QQrbC2gheC8gM7O5SXEvIDMzG6CK9RpOAGZmFatqvYZnAZmZVayq9RpOAGZmFatqvYYTgJlZxapar+EEYGZWsaruvdz4QWDvhGhmqatqvUajE4B3QjSzuqhiNXujE4D3ia8Pt9TMytfoBOCdEOvBLTWzajR6ENg7IdaD96w3q0ajE0BVI+s2N26pmVWj0V1Ag0bW3eecjrbej7Uu/LvSXI1OANB7ZN19zmnxnvXp8u9KszW6C6gf9zmnxXf0qs6egxNs2HWAlTv2smHXAfYcnJj2vn9Xmq2QFoCki4AvAIuAGyNi14z3rwSuA6aurusj4sYiyp4P9zmnx3f0Kt8wtXv/rjRb7haApEXADcDFwBrgMklrehx6W0S8K/uq7I8/eHaQGQxXu/fvSrMV0QW0HjgcEU9FxK+AW4HNBXzugvHsILPhavf+XWm2IhLACPBM1/Mj2WszfVTSw5Jul3ROAeXOm/uczYar3ft3pdmKGANQj9dm3mj4G8AtEfGSpKuAm4EP9PwwaRuwDWB0dLSA8Hpzn7O13bCzr/y70lxFtACOAN01+rOBye4DIuJYRLyUPf0n4D39PiwidkfEWESMLVu2rIDwzKwX1+6tiBbA/cAqSSvpzPK5FLi8+wBJZ0XEs9nTS4DHCijXzHJy7b7dcieAiDgp6WpgP51poDdFxKOSPguMR8SdwF9KugQ4CTwPXJm3XDMzy0cRM7vr0zE2Nhbj4+NVh2FmVhuSHoiIsWGObeVKYDMzcwIwM2stJwAzs5ZyAjAza6nGbwftvcyL5fNp1hyNTgDey7xYPp9mzdLoLiDvZV4sn0+zZml0AvBe5sXy+TRrlkYnAO9lXiyfT7NmaXQC8F7mxfL5NGuWRg8CTw1MetZKMXw+zZrFewGZmTWI9wIyM7NZOQGYmbVUo8cAbDqv4jWzbk4ALeFVvGY2k7uAWsKreM1sJieAlvAqXjObyV1ALbF8yWImevyxn+sqXo8jmDWHWwAtUcQq3qlxhInjJwheGUfYc3Ci4GjNrAxuAbREEat4B40juBVQPrfGLK9CEoCki4AvAIuAGyNi14z3TwW+CrwHOAb8cUQ8XUTZNrwt60Zy/YHwOEI6PKvLipC7C0jSIuAG4GJgDXCZpDUzDvsk8POIeCvwd8Dn85Zr5WvrbqB7Dk6wYdcBVu7Yy4ZdB5Lo8vKsLitCEWMA64HDEfFURPwKuBXYPOOYzcDN2ePbgQskqYCyrURt3A001XEPt8asCEUkgBHgma7nR7LXeh4TESeBF4A3F1C2lWjLuhF2bl3LyJLFCBhZspidW9c2ussh1Zp2W1tjg6TYUktdEWMAvWryM7cYHeaYzoHSNmAbwOjoaL7IrHB5xxHqJtWa9vaNq6eNAUDzW2ODeExkfopoARwBzul6fjYw2e8YSa8B3gg83+vDImJ3RIxFxNiyZcsKCM9s/lKtabexNTZIqi211BXRArgfWCVpJTABXApcPuOYO4ErgO8BHwMORMo3IjDLpFzTbltrbJBUW2qpy50AIuKkpKuB/XSmgd4UEY9K+iwwHhF3Av8M/Iukw3Rq/pfmLdesDL4LWj0UtdK9bXxHMDOrvZljANBpqbWxW2wudwRr/Epgr5Y0az631Oan0QnAMwPM2sNjInPX6M3gPDPAzKy/RicAzwwwM+uv0Qkg1TncZmYpaHQCaOPeNWZmw2r0ILBnBsyPZ05Zynx9FqfRCQA8M2CuPHPKUubrs1iNTwA2Nwt9168yam+uITaX70pXLCcAm2YhZ06VUXtzDbHZPLOvWI0eBLa5W8iZU2Wsy/Daj2bzzL5iOQHYNAs5c6qM2ptriM3mmX3FcgKwaRZyn/kyam+uITab74NQLI8B2Kss1MypMvbWT3n/fiuGZ/YVxwnASlPGugyv/TAbnu8HYGbWIHO5H4DHAMzMWsoJwMyspTwGUBGvVjWzqjkBVMCrVc0sBR4ErsCGXQeY6LEwaZHEbyLcIkiMW2tWJ6XdFF7SacBtwArgaeCPIuLnPY57GTiUPf3fiLgkT7l1129V6stZMnaLIB1urVmT5R0E3gF8JyJWAd/JnvdyIiLelX21+o8/DLcq1fvXpMF7C1mT5U0Am4Gbs8c3A1tyfl4r9NrPpBfvX1M97y1kTZY3AZwREc8CZP+e3ue410kal3SfpIFJQtK27Njxo0eP5gwvTTP3M1kk9TzO+9dUz3sLWZPNOgYg6W7gzB5vXTuHckYjYlLSucABSYci4sleB0bEbmA3dAaB51BGrXTvZzKznxm8f00qvLeQNdmsCSAiLuz3nqSfSjorIp6VdBbwXJ/PmMz+fUrSPcA6oGcCaCPvX5Mu/2zmxzOn6iHXNFBJ1wHHImKXpB3AaRHxVzOOeRPwy4h4SdJS4HvA5oj40Wyf39RpoGZN1q9F622by1HmXkC7gA9K+jHwwew5ksYk3Zgd83ZgXNJDwHeBXcP88TezevLMqfrItQ4gIo4BF/R4fRz4VPb4v4G1ecoxs/rwzKn68GZwZlYoz5yqDycAMyuU79tbH94MzswK5ZlT9eEEYGaF831768FdQGZmLeUEYGbWUk4AZmYt5QRgZtZSTgBmZi3lBGBm1lJOAGZmLeUEYGbWUk4AZmYt5QRgZtZSrdoKwncpMjN7RWsSwMy7FE0cP8E1XzsE4CRgZq3Umi4g36XIzGy61iQA36XIzGy61iQA36XIzGy61iQA36XIzGy61gwC+y5Fg3mGVP34Z2Z55UoAkj4O/DXwdmB9RIz3Oe4i4AvAIuDGiNiVp9z58l2KevMMqfrxz8yKkLcL6BFgK3BvvwMkLQJuAC4G1gCXSVqTs1wrkGdI1Y9/ZlaEXC2AiHgMQNKgw9YDhyPiqezYW4HNwI/ylG3F8Qyp+vHPzIpQxhjACPBM1/MjwO/3O1jSNmAbwOjo6MJGZkBnJtREjz8cniGVrib+zDymUb5Zu4Ak3S3pkR5fm4cso1fzIPodHBG7I2IsIsaWLVs2ZBGWh2dI1U/TfmZTYxoTx08QvDKmsefgRNWhNdqsLYCIuDBnGUeAc7qenw1M5vxMK5BnSNVP035mg8Y06vo9dUu1dVNGF9D9wCpJK4EJ4FLg8hLKtTnwDKn6adLPrMljGinP2Mo1C0jSRyQdAc4D9kran72+XNI+gIg4CVwN7AceA/4tIh7NF7aZNUmTV+qnPGMrVwKIiDsi4uyIODUizoiIjdnrkxGxqeu4fRHxOxHx2xHxubxBm1mzNG1Mo1vKrZvWbAVhZunasm6EnVvXMrJkMQJGlixm59a1lXeRFCHl1k1rtoIws7Q1aUyj2/aNq6eNAUA6rRsnALN5SnVmh6Ul5RlbTgBm85DyzA5LT6qtGycAq5VUat1Nn7du7eAEYLWRUq075ZkdZsPyLCCrjZTmU6c8s8NsWE4AVhsp1bqbPG/d2sNdQFYbKe2AmfLMjvlKZXylTG38nrs5AVhtpDafOtWZHfOR0vhKWdr4Pc/kLiCrjSavFq1aSuMrZWnj9zyTWwBWK02qdackpfGVsrTxe57JLQAza+WspjZ+zzM5AZgZ739b77vv9Xu9CTyTy11AZgZ89/Gjc3q9CZo4k2uunADMrLX94W0fU3IXkJm5P7ylnADMzP3hLeUuIDNzf3hLOQGYGeD+8DbK1QUk6eOSHpX0G0ljA457WtIhSQ9KGs9TppmZFSNvC+ARYCvw5SGOfX9E/CxneWZmVpBcCSAiHgOQVEw0ZmZWmrJmAQXwbUkPSNpWUplmZjbArC0ASXcDZ/Z469qI+PqQ5WyIiElJpwN3SXo8Iu7tU942YBvA6OjokB9vbdf2fd3N5mPWBBARF+YtJCIms3+fk3QHsB7omQAiYjewG2BsbCzylm3N533dzeZnwbuAJL1e0humHgMfojN4bFYI7+tuNj95p4F+RNIR4Dxgr6T92evLJe3LDjsD+C9JDwE/APZGxH/kKdesW1v3sTHLK+8soDuAO3q8Pglsyh4/BfxennLMBknpXsFmdeK9gKz2vI+N2fx4KwirPe9jYzY/TgDWCN7Hxmzu3AVkZtZSTgBmZi3VyC4grwo1M5td4xKAV4WamQ2ncV1AXhVqZjacxiUArwo1MxtO4xJAv9WfXhVqZjZd4xKAV4WamQ2ncYPAXhVqNp1nxVk/jUsA4FWhZlM8K84GaWQCMKujhaipD5oV5wRgTgBmCViomrpnxdkgjRsENqujhVq/4llxNogTgFkCFqqm7llxNogTgFkCFqqmvmXdCDu3rmVkyWIEjCxZzM6ta93/b4DHAMySsH3j6mljAFBcTd2z4qwfJwCzBHj9ilXBCcAsEa6pW9lyjQFIuk7S45IelnSHpCV9jrtI0hOSDkvakadMMzMrRt5B4LuAd0bE7wL/A1wz8wBJi4AbgIuBNcBlktbkLNfMzHLKlQAi4tsRcTJ7eh9wdo/D1gOHI+KpiPgVcCuwOU+5ZmaWX5HTQP8U+FaP10eAZ7qeH8leMzOzCs06CCzpbuDMHm9dGxFfz465FjgJ/Guvj+jxWgwobxuwDWB0dHS28MzMbJ5mTQARceGg9yVdAXwYuCAiev1hPwKc0/X8bGByQHm7gd0AY2NjfROFmZnlo95/s4f8z9JFwN8CfxgRR/sc8xo6A8QXABPA/cDlEfHoEJ9/FPjJvAOcm6XAz0oqqyh1i7lu8UL9Yq5bvOCYi/aWiFg2zIF5E8Bh4FTgWPbSfRFxlaTlwI0RsSk7bhPw98Ai4KaI+Ny8C10gksYjYqzqOOaibjHXLV6oX8x1ixccc5VyLQSLiLf2eX0S2NT1fB+wL09ZZmZWLG8GZ2bWUk4Ar9hddQDzULeY6xYv1C/musULjrkyucYAzMysvtwCMDNrqdYmAEkfl/SopN9I6juaL+lpSYckPShpvMwYe8QybMxJbL4n6TRJd0n6cfbvm/oc93J2fh+UdGfZcWYxDDxnkk6VdFv2/vclrSg/ymnxzBbvlZKOdp3XT1URZ1c8N0l6TtIjfd6XpC9m38/Dkt5ddow9Ypot5vMlvdB1jj9ddoy5RUQrv4C3A6uBe4CxAcc9DSytOt5hY6Yz1fZJ4FzgtcBDwJqK4v0bYEf2eAfw+T7HvVjxeZ31nAF/Dnwpe3wpcFvi8V4JXF/leZ0Rzx8A7wYe6fP+JjpbyQh4L/D9GsR8PvDNquPM89XaFkBEPBYR+e64XbIhY05p873NwM3Z45uBLRXFMZthzln393I7cIGkXtuclCGln/FQIuJe4PkBh2wGvhod9wFLJJ1VTnS9DRFz7bU2AcxBAN+W9EC2T1HqUtp874yIeBYg+/f0Pse9TtK4pPskVZEkhjln/39MdHbAfQF4cynRvdqwP+OPZt0pt0s6p8f7KUnpup2L8yQ9JOlbkt5RdTBz1eg7gg2zkd0QNkTEpKTTgbskPZ7VDBZEATHPafO9vAbFO4ePGc3O8bnAAUmHIuLJYiIcyjDnrNTzOothYvkGcEtEvCTpKjqtlw8seGTzl9L5HdYP6Wy78GK228EeYFXFMc1JoxNAzLKR3ZCfMZn9+5ykO+g0vxcsARQQ85w238trULySfirprIh4NmvOP9fnM6bO8VOS7gHW0enjLssw52zqmCPZ/lZvpLrugVnjjYhjXU//Cfh8CXHlUep1W4SI+EXX432S/kHS0ohIdY+gV3EX0ACSXi/pDVOPgQ8BPWcEJOR+YJWklZJeS2fAspKZNVm5V2SPrwBe1YKR9CZJp2aPlwIbgB+VFmHHMOes+3v5GHAgspHACswa74z+80uAx0qMbz7uBD6RzQZ6L/DCVPdhqiSdOTUOJGk9nb+nxwb/r8RUPQpd1RfwETq1jpeAnwL7s9eXA/uyx+fSmWHxEPAonW6YpGPOnm+iswPrk1XGTKeP/DvAj7N/T8teH6OzWSDA+4BD2Tk+BHyyolhfdc6AzwKXZI9fB/w7cBj4AXBuxdfCbPHuzK7Zh4DvAm+rON5bgGeBX2fX8CeBq4CrsvdF59axT2bXQd+ZeQnFfHXXOb4PeF/VMc/1yyuBzcxayl1AZmYt5QRgZtZSTgBmZi3lBGBm1lJOAGZmLeUEYGbWUk4AZmYt5QRgZtZS/wfjkaRCs//HLgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x21844473828>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(x2[:,0], x2[:,1])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1.1102230246251566e-17"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(x2[:,0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.std(x2[:,1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 三、 sklearn中的scaler"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "测试数据集归一化应该如下:\n",
    "### $\\frac{x\\_test\\ \\ -\\ \\ mean\\_train}{std\\_train}$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "iris = datasets.load_iris()\n",
    "X = iris.data\n",
    "y = iris.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[5.1, 3.5, 1.4, 0.2],\n",
       "       [4.9, 3. , 1.4, 0.2],\n",
       "       [4.7, 3.2, 1.3, 0.2],\n",
       "       [4.6, 3.1, 1.5, 0.2],\n",
       "       [5. , 3.6, 1.4, 0.2]])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X[:5, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## scikit-learn中的StandardScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "standard_scaler = StandardScaler()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "StandardScaler(copy=True, with_mean=True, with_std=True)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "standard_scaler.fit(X_train) # 计算出了特征信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([5.83416667, 3.0825    , 3.70916667, 1.16916667])"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "standard_scaler.mean_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.81019502, 0.44076874, 1.76295187, 0.75429833])"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "standard_scaler.scale_  # 标准差"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.90616043,  0.94720873, -1.30982967, -1.28485856],\n",
       "       [-1.15301457, -0.18717298, -1.30982967, -1.28485856],\n",
       "       [-0.16559799, -0.64092567,  0.22169257,  0.17345038],\n",
       "       [ 0.45153738,  0.72033239,  0.95909217,  1.49918578],\n",
       "       [-0.90616043, -1.3215547 , -0.40226093, -0.0916967 ],\n",
       "       [ 1.43895396,  0.2665797 ,  0.56203085,  0.30602392],\n",
       "       [ 0.3281103 , -1.09467835,  1.07253826,  0.30602392],\n",
       "       [ 2.1795164 , -0.18717298,  1.63976872,  1.2340387 ],\n",
       "       [-0.78273335,  2.30846679, -1.25310662, -1.4174321 ],\n",
       "       [ 0.45153738, -2.00218372,  0.44858475,  0.43859746],\n",
       "       [ 1.80923518, -0.41404933,  1.46959958,  0.83631808],\n",
       "       [ 0.69839152,  0.2665797 ,  0.90236912,  1.49918578],\n",
       "       [ 0.20468323,  0.72033239,  0.44858475,  0.571171  ],\n",
       "       [-0.78273335, -0.86780201,  0.10824648,  0.30602392],\n",
       "       [-0.53587921,  1.40096142, -1.25310662, -1.28485856],\n",
       "       [-0.65930628,  1.40096142, -1.25310662, -1.28485856],\n",
       "       [-1.0295875 ,  0.94720873, -1.19638358, -0.7545644 ],\n",
       "       [-1.77014994, -0.41404933, -1.30982967, -1.28485856],\n",
       "       [-0.04217092, -0.86780201,  0.10824648,  0.04087684],\n",
       "       [-0.78273335,  0.72033239, -1.30982967, -1.28485856],\n",
       "       [-1.52329579,  0.72033239, -1.30982967, -1.15228502],\n",
       "       [ 0.82181859,  0.2665797 ,  0.78892303,  1.10146516],\n",
       "       [-0.16559799, -0.41404933,  0.27841562,  0.17345038],\n",
       "       [ 0.94524567, -0.18717298,  0.39186171,  0.30602392],\n",
       "       [ 0.20468323, -0.41404933,  0.44858475,  0.43859746],\n",
       "       [-1.39986872,  0.2665797 , -1.19638358, -1.28485856],\n",
       "       [-1.15301457,  0.03970336, -1.25310662, -1.4174321 ],\n",
       "       [ 1.06867274,  0.03970336,  1.07253826,  1.63175932],\n",
       "       [ 0.57496445, -0.86780201,  0.67547694,  0.83631808],\n",
       "       [ 0.3281103 , -0.64092567,  0.56203085,  0.04087684],\n",
       "       [ 0.45153738, -0.64092567,  0.61875389,  0.83631808],\n",
       "       [-0.16559799,  2.98909581, -1.25310662, -1.01971148],\n",
       "       [ 0.57496445, -1.3215547 ,  0.67547694,  0.43859746],\n",
       "       [ 0.69839152, -0.41404933,  0.33513866,  0.17345038],\n",
       "       [-0.90616043,  1.62783776, -1.02621444, -1.01971148],\n",
       "       [ 1.19209981, -0.64092567,  0.61875389,  0.30602392],\n",
       "       [-0.90616043,  0.94720873, -1.30982967, -1.15228502],\n",
       "       [-1.89357701, -0.18717298, -1.47999881, -1.4174321 ],\n",
       "       [ 0.08125616, -0.18717298,  0.78892303,  0.83631808],\n",
       "       [ 0.69839152, -0.64092567,  1.07253826,  1.2340387 ],\n",
       "       [-0.28902506, -0.64092567,  0.67547694,  1.10146516],\n",
       "       [-0.41245214, -1.54843104, -0.00519961, -0.22427024],\n",
       "       [ 1.31552689,  0.03970336,  0.67547694,  0.43859746],\n",
       "       [ 0.57496445,  0.72033239,  1.07253826,  1.63175932],\n",
       "       [ 0.82181859, -0.18717298,  1.18598435,  1.36661224],\n",
       "       [-0.16559799,  1.62783776, -1.13966053, -1.15228502],\n",
       "       [ 0.94524567, -0.41404933,  0.5053078 ,  0.17345038],\n",
       "       [ 1.06867274,  0.49345605,  1.12926131,  1.76433286],\n",
       "       [-1.27644165, -0.18717298, -1.30982967, -1.4174321 ],\n",
       "       [-1.0295875 ,  1.17408507, -1.30982967, -1.28485856],\n",
       "       [ 0.20468323, -0.18717298,  0.61875389,  0.83631808],\n",
       "       [-1.0295875 , -0.18717298, -1.19638358, -1.28485856],\n",
       "       [ 0.3281103 , -0.18717298,  0.67547694,  0.83631808],\n",
       "       [ 0.69839152,  0.03970336,  1.01581521,  0.83631808],\n",
       "       [-0.90616043,  1.40096142, -1.25310662, -1.01971148],\n",
       "       [-0.16559799, -0.18717298,  0.27841562,  0.04087684],\n",
       "       [-1.0295875 ,  0.94720873, -1.36655271, -1.15228502],\n",
       "       [-0.90616043,  1.62783776, -1.25310662, -1.15228502],\n",
       "       [-1.52329579,  0.2665797 , -1.30982967, -1.28485856],\n",
       "       [-0.53587921, -0.18717298,  0.44858475,  0.43859746],\n",
       "       [ 0.82181859, -0.64092567,  0.5053078 ,  0.43859746],\n",
       "       [ 0.3281103 , -0.64092567,  0.16496953,  0.17345038],\n",
       "       [-1.27644165,  0.72033239, -1.19638358, -1.28485856],\n",
       "       [-0.90616043,  0.49345605, -1.13966053, -0.88713794],\n",
       "       [-0.04217092, -0.86780201,  0.78892303,  0.96889162],\n",
       "       [-0.28902506, -0.18717298,  0.22169257,  0.17345038],\n",
       "       [ 0.57496445, -0.64092567,  0.78892303,  0.43859746],\n",
       "       [ 1.06867274,  0.49345605,  1.12926131,  1.2340387 ],\n",
       "       [ 1.68580811, -0.18717298,  1.18598435,  0.571171  ],\n",
       "       [ 1.06867274, -0.18717298,  0.84564608,  1.49918578],\n",
       "       [-1.15301457,  0.03970336, -1.25310662, -1.4174321 ],\n",
       "       [-1.15301457, -1.3215547 ,  0.44858475,  0.70374454],\n",
       "       [-0.16559799, -1.3215547 ,  0.73219998,  1.10146516],\n",
       "       [-1.15301457, -1.54843104, -0.2320918 , -0.22427024],\n",
       "       [-0.41245214, -1.54843104,  0.05152343, -0.0916967 ],\n",
       "       [ 1.06867274, -1.3215547 ,  1.18598435,  0.83631808],\n",
       "       [ 0.82181859, -0.18717298,  1.01581521,  0.83631808],\n",
       "       [-0.16559799, -1.09467835, -0.1186457 , -0.22427024],\n",
       "       [ 0.20468323, -2.00218372,  0.73219998,  0.43859746],\n",
       "       [ 1.06867274,  0.03970336,  0.56203085,  0.43859746],\n",
       "       [-1.15301457,  0.03970336, -1.25310662, -1.4174321 ],\n",
       "       [ 0.57496445, -1.3215547 ,  0.73219998,  0.96889162],\n",
       "       [-1.39986872,  0.2665797 , -1.36655271, -1.28485856],\n",
       "       [ 0.20468323, -0.86780201,  0.78892303,  0.571171  ],\n",
       "       [-0.04217092, -1.09467835,  0.16496953,  0.04087684],\n",
       "       [ 1.31552689,  0.2665797 ,  1.12926131,  1.49918578],\n",
       "       [-1.77014994, -0.18717298, -1.36655271, -1.28485856],\n",
       "       [ 1.56238103, -0.18717298,  1.2427074 ,  1.2340387 ],\n",
       "       [ 1.19209981,  0.2665797 ,  1.2427074 ,  1.49918578],\n",
       "       [-0.78273335,  0.94720873, -1.25310662, -1.28485856],\n",
       "       [ 2.54979762,  1.62783776,  1.52632263,  1.10146516],\n",
       "       [ 0.69839152, -0.64092567,  1.07253826,  1.36661224],\n",
       "       [-0.28902506, -0.41404933, -0.06192266,  0.17345038],\n",
       "       [-0.41245214,  2.53534313, -1.30982967, -1.28485856],\n",
       "       [-1.27644165, -0.18717298, -1.30982967, -1.15228502],\n",
       "       [ 0.57496445, -0.41404933,  1.07253826,  0.83631808],\n",
       "       [-1.77014994,  0.2665797 , -1.36655271, -1.28485856],\n",
       "       [-0.53587921,  1.8547141 , -1.13966053, -1.01971148],\n",
       "       [-1.0295875 ,  0.72033239, -1.19638358, -1.01971148],\n",
       "       [ 1.06867274, -0.18717298,  0.73219998,  0.70374454],\n",
       "       [-0.53587921,  1.8547141 , -1.36655271, -1.01971148],\n",
       "       [ 2.30294347, -0.64092567,  1.69649176,  1.10146516],\n",
       "       [-0.28902506, -0.86780201,  0.27841562,  0.17345038],\n",
       "       [ 1.19209981, -0.18717298,  1.01581521,  1.2340387 ],\n",
       "       [-0.41245214,  0.94720873, -1.36655271, -1.28485856],\n",
       "       [-1.27644165,  0.72033239, -1.02621444, -1.28485856],\n",
       "       [-0.53587921,  0.72033239, -1.13966053, -1.28485856],\n",
       "       [ 2.30294347,  1.62783776,  1.69649176,  1.36661224],\n",
       "       [ 1.31552689,  0.03970336,  0.95909217,  1.2340387 ],\n",
       "       [-0.28902506, -1.3215547 ,  0.10824648, -0.0916967 ],\n",
       "       [-0.90616043,  0.72033239, -1.25310662, -1.28485856],\n",
       "       [-0.90616043,  1.62783776, -1.19638358, -1.28485856],\n",
       "       [ 0.3281103 , -0.41404933,  0.56203085,  0.30602392],\n",
       "       [-0.04217092,  2.08159044, -1.42327576, -1.28485856],\n",
       "       [-1.0295875 , -2.45593641, -0.1186457 , -0.22427024],\n",
       "       [ 0.69839152,  0.2665797 ,  0.44858475,  0.43859746],\n",
       "       [ 0.3281103 , -0.18717298,  0.5053078 ,  0.30602392],\n",
       "       [ 0.08125616,  0.2665797 ,  0.61875389,  0.83631808],\n",
       "       [ 0.20468323, -2.00218372,  0.16496953, -0.22427024],\n",
       "       [ 1.93266225, -0.64092567,  1.35615349,  0.96889162]])"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train = standard_scaler.transform(X_train)\n",
    "X_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.28902506, -0.18717298,  0.44858475,  0.43859746],\n",
       "       [-0.04217092, -0.64092567,  0.78892303,  1.63175932],\n",
       "       [-1.0295875 , -1.77530738, -0.2320918 , -0.22427024],\n",
       "       [-0.04217092, -0.86780201,  0.78892303,  0.96889162],\n",
       "       [-1.52329579,  0.03970336, -1.25310662, -1.28485856],\n",
       "       [-0.41245214, -1.3215547 ,  0.16496953,  0.17345038],\n",
       "       [-0.16559799, -0.64092567,  0.44858475,  0.17345038],\n",
       "       [ 0.82181859, -0.18717298,  0.84564608,  1.10146516],\n",
       "       [ 0.57496445, -1.77530738,  0.39186171,  0.17345038],\n",
       "       [-0.41245214, -1.09467835,  0.39186171,  0.04087684],\n",
       "       [ 1.06867274,  0.03970336,  0.39186171,  0.30602392],\n",
       "       [-1.64672287, -1.77530738, -1.36655271, -1.15228502],\n",
       "       [-1.27644165,  0.03970336, -1.19638358, -1.28485856],\n",
       "       [-0.53587921,  0.72033239, -1.25310662, -1.01971148],\n",
       "       [ 1.68580811,  1.17408507,  1.35615349,  1.76433286],\n",
       "       [-0.04217092, -0.86780201,  0.22169257, -0.22427024],\n",
       "       [-1.52329579,  1.17408507, -1.53672185, -1.28485856],\n",
       "       [ 1.68580811,  0.2665797 ,  1.29943044,  0.83631808],\n",
       "       [ 1.31552689,  0.03970336,  0.78892303,  1.49918578],\n",
       "       [ 0.69839152, -0.86780201,  0.90236912,  0.96889162],\n",
       "       [ 0.57496445,  0.49345605,  0.56203085,  0.571171  ],\n",
       "       [-1.0295875 ,  0.72033239, -1.25310662, -1.28485856],\n",
       "       [ 2.30294347, -1.09467835,  1.80993786,  1.49918578],\n",
       "       [-1.0295875 ,  0.49345605, -1.30982967, -1.28485856],\n",
       "       [ 0.45153738, -0.41404933,  0.33513866,  0.17345038],\n",
       "       [ 0.08125616, -0.18717298,  0.27841562,  0.43859746],\n",
       "       [-1.0295875 ,  0.2665797 , -1.42327576, -1.28485856],\n",
       "       [-0.41245214, -1.77530738,  0.16496953,  0.17345038],\n",
       "       [ 0.57496445,  0.49345605,  1.29943044,  1.76433286],\n",
       "       [ 2.30294347, -0.18717298,  1.35615349,  1.49918578]])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test_standard = standard_scaler.transform(X_test)\n",
    "X_test_standard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 用标准化的X_train去训练knn模型\n",
    "from sklearn.neighbors import KNeighborsClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "knn_clf = KNeighborsClassifier(n_neighbors=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
       "           metric_params=None, n_jobs=1, n_neighbors=3, p=2,\n",
       "           weights='uniform')"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn_clf.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn_clf.score(X_test_standard, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.3333333333333333"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn_clf.score(X_test, y_test) \n",
    "# 这里准确率33%, 只要是没有将测试数据进行归一化处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
